version 18.0               // version control
set processors 8           // to ensure replicability across different numbers of cores
clear all                  // clear existing data
macro drop _all            // and macros, clean slate
set seed 20220909          // set seed

local pgm  "dst-Figure_6_Panel_C_sleep_deprivation_binscatter"       // file name
local who  "Muzhe Yang"                                              // author
local dte  "2025-01-20"                                              // created date
local dte2 "`c(current_date)'"                                       // last run date
local tag  "`pgm'.do, created by `who' on `dte', last run on `dte2'"

capture log close
log using "code\analysis\figures\\`pgm'.txt", text replace 
display "`tag'"

**# data prep ------------------------------------------------------------------------------------

use "data_clean\dst-data04_for_estimation_within_250_miles", clear
summ dist_to_border
tab wave, missing
preserve 
keep wave TractFIPS sleep_depr
reshape wide sleep_depr, i(TractFIPS) j(wave)
compare sleep_depr1 sleep_depr2  // two waves of data are identical, all from 2018
restore
keep if wave == 2
codebook TractFIPS

*## (1) create the 9 cells, following page 215 of the following paper:
*## Giuntella, O. and F. Mazzonna (2019). "Sunset Time and the Economic Effects of Social Jetlag: Evidence from US Time Zone Borders." Journal of Health Economics 65: 210-226.

assert !missing(centroid_lat)
assert !missing(region)
gen     cell = 1 if centroid_lat > 40                        & region == "eastern and central"
replace cell = 2 if (34 < centroid_lat & centroid_lat <= 40) & region == "eastern and central"
replace cell = 3 if centroid_lat <= 34                       & region == "eastern and central"
replace cell = 4 if centroid_lat > 40                        & region == "central and mountain"
replace cell = 5 if (34 < centroid_lat & centroid_lat <= 40) & region == "central and mountain"
replace cell = 6 if centroid_lat <= 34                       & region == "central and mountain"
replace cell = 7 if centroid_lat > 40                        & region == "mountain and pacific"
replace cell = 8 if (34 < centroid_lat & centroid_lat <= 40) & region == "mountain and pacific"
replace cell = 9 if centroid_lat <= 34                       & region == "mountain and pacific"
quietly tab cell, gen(cell_)
summ centroid_lat if cell == 1 | cell == 4 | cell == 7
summ centroid_lat if cell == 2 | cell == 5 | cell == 8
summ centroid_lat if cell == 3 | cell == 6 | cell == 9

*## (2) control variables

global x "pop white_pct black_pct hispanic_pct pop_under_18yr_pct pop_65yr_over_pct age_median educ_hs_pct educ_coll_pct married_pct hh_size hh_income_median home_value_median no_health_ins_pct unemploy_pct"

des dist_to_border dist_to_border_signed $x centroid_lat daylight_dur_max

**# estimation prep -----------------------------------------------------------------------

encode CountyFIPS, gen(county_fips)
egen nomiss = rowmiss($x dist_to_border centroid_lat daylight_dur_max)
assert !missing(StateAbbr)
local radius = 50
gen sample_selection = (nomiss == 0 & StateAbbr != "AZ" & dist_to_border <= `radius')

**# figure --------------------------------------------------------------------------------

preserve
rdplot sleep_depr dist_to_border_signed if sample_selection == 1, c(0) nbins(15 15) p(0) kernel(uni) ci(95) hide genvars
keep rdplot*
duplicates drop
drop if missing(rdplot_id)
sort rdplot_id
summ
twoway (rcap rdplot_ci_l rdplot_ci_r rdplot_mean_x, sort) (scatter rdplot_mean_y rdplot_mean_x, sort mcolor(blue)), ///
       xline(0, lpattern(dash) lwidth(*1) lcolor(gray)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       legend(label(1 "the 95% confidence interval") label(2 "the binned sample mean") order(2 1) rows(1) size(*0.8) position(6)) ///
       title("All time zones in the Contiguous United States", size(*0.9) span) ///
       name(all, replace) scheme(stcolor)
restore

preserve
rdplot sleep_depr dist_to_border_signed if sample_selection == 1 & region == "eastern and central", c(0) nbins(15 15) p(0) kernel(uni) ci(95) hide genvars
keep rdplot*
duplicates drop
drop if missing(rdplot_id)
sort rdplot_id
summ
twoway (rcap rdplot_ci_l rdplot_ci_r rdplot_mean_x, sort) (scatter rdplot_mean_y rdplot_mean_x, sort mcolor(blue)), ///
       xline(0, lpattern(dash) lwidth(*1) lcolor(gray)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       legend(label(1 "the 95% confidence interval") label(2 "the binned sample mean") order(2 1) rows(1) size(*0.8) position(6)) ///
       title("Eastern Time Zone and Central Time Zone", size(*0.9) span) ///
       name(et_ct, replace) scheme(stcolor)
restore

graph combine all et_ct, ///
      iscale(*0.8) ycommon ///
      title("Panel C: Dependent variable is sleep deprivation", size(*0.8) span) ///
      subtitle("The dependent variable is defined as the prevalence of adults aged 18 years or older who report usually getting insufficient sleep (i.e., less than 7 hours, on average, during a 24-hour period)." ///
               "The prevalence is at the census tract level, measured annually, and measured in 0–100 percentage points.", size(*0.5) span) ///
      scheme(stcolor)

graph save "code\analysis\figures\\`pgm'", replace
graph export "code\analysis\figures\\`pgm'.png", replace
graph export "code\analysis\figures\\`pgm'.emf", replace

log close
exit